# Final Exhibit - function for tables 2-3 ---------------------------------------
get_weights_by_phat_byN_month <- function(dt,
                                    agg_level) {
  # check input parameters
  stopifnot(c("DMG_died_within_365d", "prob_for_report") %in% names(dt))
  stopifnot(nrow(dt[DMG_died_within_365d == "1"])==nrow(dt))
  stopifnot(is.numeric(agg_level) & agg_level > 0 & agg_level < 1)
  # get number of patient at each level of dt
  weights <- 
    dt[, .(w = .N/nrow(dt)), 
       by=.(month_from_index, 
            round_prob=plyr::round_any(prob_for_report, agg_level, ceiling))]
  return(weights)
}

get_weights_by_phat_adj_month <- function(dt,
                                    agg_level,
                                    w_var = "num_days_lived",
                                    col_name = "w") {
  # check input parameters
  stopifnot(c("DMG_died_within_365d", "prob_for_report", w_var) %in% names(dt))
  stopifnot(nrow(dt[DMG_died_within_365d == "1"])==nrow(dt))
  stopifnot(is.numeric(agg_level) & agg_level > 0 & agg_level < 1)
  # get number of days survived at each level of dt
  tot<-sum(eval(parse(text=paste0("dt$", w_var))))
  weights <- 
    dt[, .(w = sum(get(w_var))/tot), 
       by=.(month_from_index, 
            round_prob=plyr::round_any(prob_for_report, agg_level, ceiling))]
  data.table::setnames(weights, "w", col_name)
  return(weights)
}

do_table_3_month <- function(dt_exh_month = build_dyn_new(con = con),
                       dt_cost_temp = dt_cost_for_exhibits_cancer,
                       sample_n = "cancer",
                       round_par = 0.05,
                       low_intensity = c("oncology","internal_medicine",
                                         "geriatry","rehabilitation")) {
  # V  Final Exhibit - Table 3 table A10 : Admissions statistics -------------------------------
 
  # work with initial phats: 
  dt_exh_temp <- merge(x = dt_exh_month[,-c("prob_for_report","CNRS_topo_main_groups")],
                  y = dt_exh_month[month_from_index == 0,
                                   c("S_teudat_zehut_XX","prob_for_report")],
                  by = "S_teudat_zehut_XX",
                  all.x = TRUE,
                  all.y = FALSE)
  
  dt_exh_temp[,days_lived := ifelse(full_month_to_death==0,
                                        as.numeric(difftime(DMG_date_of_death_XX,
                                                            S_index_date_XX , 
                                                            units = "days"))+1 ,
                                        30)][
                                          is.na(full_month_to_death), days_lived:=30][
                                            days_lived>30,days_lived:=30 ]
  
  dt_exh<- dt_exh_temp[,.(S_teudat_zehut_XX,
                  S_index_date_XX,
                  num_days_lived = days_lived,
                  month_from_index,
                  DMG_died_within_365d=died_first_year,
                  DMG_date_of_death_XX,
                  prob_for_report)][,
          date_end_month :=  data.table::shift(S_index_date_XX,
                                               n =1 ,
                                               type = "lead",
          )-lubridate::days(1),
          by = S_teudat_zehut_XX][is.na(date_end_month),
                                  date_end_month:=ifelse(DMG_died_within_365d=="1",
                                                         as.POSIXct(DMG_date_of_death_XX),
                                                         as.POSIXct(S_index_date_XX)+lubridate::days(30))] 

  
  dt_cost <- merge(x = dt_cost_temp[ main_cat %in% c("Inpatient_Planned", "Inpatient_Unplanned","Low", "High"),],
                   y = dt_exh[,.(S_teudat_zehut_XX,
                                 S_month_index_date_XX = S_index_date_XX,
                                 date_end_month,
                                 month_from_index)],
                   by = "S_teudat_zehut_XX",
                   all = T ,
                   allow.cartesian = T)[cost_date>= S_month_index_date_XX & cost_date<=date_end_month]
  
  
  
  
  
tab_adm_rate_agg <- NULL
  tab_adm_num_agg  <- NULL
  tab_adm_LOS_agg <- NULL
  
  for (drop_last_adm in c(TRUE, FALSE)) {
    
    low_inten <- low_intensity
    
    # Additional anlyses - Admission rates - agg --------------------------------a
    tab_adm_rate_agg_temp <- NULL
    
    dt_temp_adm_any <- merge(
      x = dt_exh[,.(S_teudat_zehut_XX, month_from_index,
                    DMG_died_within_365d, prob_for_report)],
      y = data.table::dcast.data.table(
        data = dt_cost[(drop_last_adm == F | DMG_died_within_365d=="0" | 
                            difftime(DMG_date_of_death_XX, event_date_end, units = "days")>days_int), 
                       .(exist=(.N>0)), 
                       by=.(S_teudat_zehut_XX, 
                            month_from_index,
                            intensity = factor(ifelse(profession%in%low_inten,
                                                      "Low","High")))],
        formula = S_teudat_zehut_XX  +month_from_index ~ intensity,
        value.var = "exist",
        fill = FALSE
      ),
      by = c("S_teudat_zehut_XX","month_from_index"),
      all.x = T,
      all.y = F
    )
    cols_NA2F_agg <-  grep(("Low|High"), 
                           names(dt_temp_adm_any), 
                           value = T)
    dt_temp_adm_any[, (cols_NA2F_agg):=lapply(.SD, function(x){ifelse(is.na(x), FALSE, x)}), 
                    .SDcols=cols_NA2F_agg]
    dt_temp_adm_any[, any_adm := (Low == T | High == T)]
    
    dt_temp_adm_any_w <- merge(
      x = dt_temp_adm_any[, round_prob:=plyr::round_any(prob_for_report, round_par, ceiling)],
      y = get_weights_by_phat_byN_month(dt_exh[DMG_died_within_365d=="1"], round_par),
      by = c("round_prob","month_from_index"),
      all.x = T,
      all.y = F
    )
    
    cols_agg <- c("Low", "High", "any_adm")
    tab_temp <- merge(
      # unweighted data
      x = data.table::dcast.data.table(data.table::melt.data.table(
        data = dt_temp_adm_any_w[, lapply(.SD, mean), 
                                 by=.(DMG_died_within_365d), 
                                 .SDcols=cols_agg],
        id.vars = "DMG_died_within_365d"
      ), formula = variable ~ DMG_died_within_365d),
      # weighted data
      y = data.table::as.data.table(t(
        dt_temp_adm_any_w[DMG_died_within_365d=="0", 
                          lapply(.SD, mean), 
                          by=.(round_prob, w), 
                          .SDcols=cols_agg][
                            , lapply(.SD, function(x){sum(x*w)/sum(w)}), 
                            .SDcols=cols_agg
                            ]
      ),keep.rownames="variable" ),
      by = "variable",
      all = T
    )
    
    # QA -----------------d
    # no NA
    stopifnot(sum(is.na(tab_temp)) == 0)
    # weigthing works
    qa_tab <- merge(
      # unweighted data
      x = data.table::dcast.data.table(data.table::melt.data.table(
        data = dt_temp_adm_any_w[, lapply(.SD, mean), 
                                 by=.(DMG_died_within_365d), 
                                 .SDcols=cols_agg],
        id.vars = "DMG_died_within_365d"
      ), formula = variable ~ DMG_died_within_365d),
      # weighted data
      y = data.table::as.data.table(t(
        dt_temp_adm_any_w[DMG_died_within_365d=="1", 
                          lapply(.SD, mean), 
                          by=.(round_prob, w), 
                          .SDcols=cols_agg][
                            , lapply(.SD, function(x){sum(x*w)/sum(w)}), 
                            .SDcols=cols_agg
                            ]
      ),keep.rownames="variable" ),
      by = "variable",
      all = T
    )[, eq := (round(V1, 4)==round(`1`, 4))]
    stopifnot(nrow(qa_tab[eq==T]) == nrow(qa_tab))
    rm(qa_tab)
    # QA ends ------------d
    
    tab_adm_rate_agg_temp <- rbind(
      tab_adm_rate_agg_temp,
      tab_temp[, sample:=sample_n]
    )
    rm(
      dt_temp_adm_any, dt_temp_adm_any_w, cols_agg, tab_temp)
    
    tab_adm_rate_agg_temp[,variable:=factor(variable, 
                                            levels=c("any_adm", "Low", "High"))]
    data.table::setcolorder(tab_adm_rate_agg_temp, c("sample", "variable", "0", "V1", "1"))
    data.table::setorder(tab_adm_rate_agg_temp, sample, variable)
    tab_adm_rate_agg_temp[, c("0", "V1", "1"):=lapply(.SD, round, 3), .SDcols=c("0", "V1", "1")]
    tab_adm_rate_agg_temp[, last_adm_dropped:=drop_last_adm]
    
    tab_adm_rate_agg <- rbind(
      tab_adm_rate_agg,
      tab_adm_rate_agg_temp
    )
    rm(tab_adm_rate_agg_temp)
    
    # Admission numbers - agg ---------------------------------------------------a
    tab_adm_num_agg_temp <- NULL
    dt_temp_adm_any <- merge(
      x = dt_exh[,.(S_teudat_zehut_XX, month_from_index,  
                    DMG_died_within_365d, prob_for_report, num_days_lived)],
      y = data.table::dcast.data.table(
        data = dt_cost[ (drop_last_adm == F | DMG_died_within_365d=="0" | 
                            difftime(DMG_date_of_death_XX, event_date_end, units = "days")>days_int),
                       .N, 
                       by=.(S_teudat_zehut_XX, 
                            month_from_index, 
                            intensity = factor(ifelse(profession%in%low_inten,
                                                      "Low","High")))],
        formula = S_teudat_zehut_XX + month_from_index ~ intensity,
        value.var = "N",
        fill = 0
      ),
      by = c("S_teudat_zehut_XX", "month_from_index"),
      all.x = F, # keep only those with admissions
      all.y = T  # keep only those with admissions
    )[, any_adm := Low + High]
    
    # QA ----------------d
    stopifnot(nrow(dt_temp_adm_any[any_adm==0 | is.na(any_adm)])==0)
    # QA ends -----------d
    
    # add weights
    dt_temp_adm_any_w <- merge(
      x = dt_temp_adm_any[, round_prob:=plyr::round_any(prob_for_report, round_par, ceiling)],
      y = get_weights_by_phat_adj_month(dt_temp_adm_any[DMG_died_within_365d=="1"], round_par, "num_days_lived"),
      by = c("round_prob","month_from_index"),
      all.x = T,
      all.y = F
    ) 
    
    cols_agg <- c("Low", "High", "any_adm")
    tab_temp <- merge(
      # unweighted, yet adjusted, data
      x = data.table::dcast.data.table(data.table::melt.data.table(
        data = dt_temp_adm_any_w[, lapply(.SD, function(x){(sum(x)/sum(num_days_lived))*31}), 
                                 by=.(DMG_died_within_365d), 
                                 .SDcols=cols_agg],
        id.vars = "DMG_died_within_365d"
      ), formula = variable ~ DMG_died_within_365d),
      # weighted data
      y = data.table::as.data.table(t(
        dt_temp_adm_any_w[DMG_died_within_365d=="0", 
                          lapply(.SD, function(x){(sum(x)/sum(num_days_lived))*31}), 
                          by=.(round_prob,w=ifelse(is.na(w),0,w)), 
                          .SDcols=cols_agg][
                            , lapply(.SD, function(x){sum(x*w)/sum(w)}), 
                            .SDcols=cols_agg
                            ]
      ),keep.rownames="variable" ),
      by = "variable",
      all = T
    )
    # QA -----------------d
    # no NA
    stopifnot(sum(is.na(tab_temp)) == 0)
    # weigthing works
    qa_tab <- merge(
      # unweighted, yet adjusted, data
      x = data.table::dcast.data.table(data.table::melt.data.table(
        data = dt_temp_adm_any_w[, lapply(.SD, function(x){(sum(x)/sum(num_days_lived))*31}), 
                                 by=.(DMG_died_within_365d), 
                                 .SDcols=cols_agg],
        id.vars = "DMG_died_within_365d"
      ), formula = variable ~ DMG_died_within_365d),
      # weighted data
      y = data.table::as.data.table(t(
        dt_temp_adm_any_w[DMG_died_within_365d=="1", 
                          lapply(.SD, function(x){(sum(x)/sum(num_days_lived))*31}), 
                          by=.(round_prob, w), 
                          .SDcols=cols_agg][
                            , lapply(.SD, function(x){sum(x*w)/sum(w)}), 
                            .SDcols=cols_agg
                            ]
      ),keep.rownames="variable" ),
      by = "variable",
      all = T
    )[, eq := (round(V1, 3)==round(`1`,3))]
    stopifnot(nrow(qa_tab[eq==T]) == nrow(qa_tab))
    rm(qa_tab)
    # QA ends ------------d
    
    tab_adm_num_agg_temp <- rbind(
      tab_adm_num_agg_temp,
      tab_temp[, sample := sample_n]
    )
    rm(
      dt_temp_adm_any, dt_temp_adm_any_w, cols_agg, tab_temp)
    
    tab_adm_num_agg_temp[,variable:=factor(variable, 
                                           levels=c("any_adm", "Low", "High"))]
    data.table::setcolorder(tab_adm_num_agg_temp, c("sample", "variable", "0", "V1", "1"))
    data.table::setorder(tab_adm_num_agg_temp, sample, variable)
    tab_adm_num_agg_temp[, c("0", "V1", "1"):=lapply(.SD, round, 3), .SDcols=c("0", "V1", "1")]
    tab_adm_num_agg_temp[, last_adm_dropped:=drop_last_adm]
    
    tab_adm_num_agg <- rbind(
      tab_adm_num_agg,
      tab_adm_num_agg_temp
    )
    rm(tab_adm_num_agg_temp)
    
    # Additional anlyses - Admission LOS - agg ----------------------------------a
    tab_adm_LOS_agg_temp <- NULL

    dt_temp_adm_any <- merge(
      x = dt_exh[,.(S_teudat_zehut_XX, month_from_index,
                    DMG_died_within_365d, prob_for_report, num_days_lived)],
      y = data.table::dcast.data.table(
        data = dt_cost[  (drop_last_adm == F | DMG_died_within_365d=="0" | 
                            difftime(DMG_date_of_death_XX, event_date_end, units = "days")>days_int), 
                       .(amount = sum(ifelse(as.integer(difftime( event_date_end, cost_date, units = "days"))>365,
                                             as.integer(1),
                                             as.integer(difftime( event_date_end, cost_date, units = "days"))
                       ) 
                       ),
                       .N), 
                       by=.(S_teudat_zehut_XX, 
                            month_from_index, 
                            intensity = factor(ifelse(profession%in%low_inten,
                                                      "Low","High")))],
        formula = S_teudat_zehut_XX + month_from_index  ~ intensity,
        value.var = c("N", "amount"),
        fill = 0
      ),
      by = c("S_teudat_zehut_XX", "month_from_index"),
      all.x = F, # keep only those with admissions
      all.y = T  # keep only those with admissions
    )[, `:=` (
      N_any_adm = N_High + N_Low,
      amount_any_adm = amount_High + amount_Low
    )]
    
    # QA ----------------d
    stopifnot(nrow(dt_temp_adm_any[N_any_adm==0 | is.na(N_any_adm)])==0)
    stopifnot(nrow(dt_temp_adm_any[ is.na(amount_High)])==0)
    
    # QA ends -----------d
    
    tab <- NULL
    tab_qa <- NULL
    for (varn in c("any_adm", "Low", "High")) {
      amnt <- paste0("amount_", varn)
      nAdm <- paste0("N_", varn)
      dt_temp <- dt_temp_adm_any[nAdm>0]
      unweighted <- data.table::dcast.data.table(
        data = dt_temp[, .(variable=varn, V1 = sum(get(amnt),na.rm =T)/
                             sum(get(nAdm),na.rm =T)),
                       by=.(DMG_died_within_365d)],
        formula = variable~DMG_died_within_365d,
        value.var = "V1"
      )
      dt_temp_w <- merge(
        x = dt_temp[, round_prob:=plyr::round_any(prob_for_report, round_par, ceiling)],
        y = get_weights_by_phat_adj_month(dt_temp[DMG_died_within_365d=="1"], round_par, nAdm),
        by = c("round_prob","month_from_index"),
        all.x = T,
        all.y = F
      )
      weighted <- dt_temp_w[DMG_died_within_365d=="0", 
                            .(V1 = sum(get(amnt))/sum(get(nAdm))), 
                            by=.(round_prob,
                                 w = ifelse(is.na(w),0,w))][, .(V1=sum(V1*w, na.rm = TRUE)/sum(w))] 
      tab <- rbind(
        tab,
        cbind(unweighted, weighted)
      )
      
      weighted_qa <- dt_temp_w[DMG_died_within_365d=="1", 
                               .(V1 = sum(get(amnt))/sum(get(nAdm))), 
                               by=.(round_prob,
                                    w = ifelse(is.na(w),0,w))][, sum(V1*w, na.rm=TRUE)/sum(w)]
      tab_qa <- rbind(
        tab_qa,
        cbind(unweighted, weighted_qa)
      )
      rm(unweighted, weighted, weighted_qa, dt_temp, dt_temp_w, amnt, nAdm, varn)
    }
    
    # QA -----------------d
    # no NA
    stopifnot(sum(is.na(tab)) == 0)
    # weigthing works
    stopifnot(all(round(tab_qa$`1`, 0) == round(tab_qa$weighted_qa, 0)))
    rm(tab_qa)
    # QA ends ------------d
    
    tab_adm_LOS_agg_temp <- rbind(
      tab_adm_LOS_agg_temp,
      tab[, sample := sample_n]
    )
    rm(dt_temp_adm_any, tab)
    
    tab_adm_LOS_agg_temp[,variable:=factor(variable, 
                                           levels=c("any_adm", "Low", "High"))]
    data.table::setcolorder(tab_adm_LOS_agg_temp, c("sample", "variable", "0", "V1", "1"))
    data.table::setorder(tab_adm_LOS_agg_temp, sample, variable)
    tab_adm_LOS_agg_temp[, c("0", "V1", "1"):=lapply(.SD, round, 3), 
                         .SDcols=c("0", "V1", "1")]
    tab_adm_LOS_agg_temp[, last_adm_dropped:=drop_last_adm]
    
    tab_adm_LOS_agg <- rbind(
      tab_adm_LOS_agg,
      tab_adm_LOS_agg_temp
    )
  }

    full_tab_3 <- rbind(
    data.table::dcast.data.table(data=tab_adm_rate_agg, 
                                 formula=sample+variable~last_adm_dropped, 
                                 value.var=c("0", "V1", "1"))[
                                   , .(sample, variable, `0_FALSE`, `V1_FALSE`, 
                                       `1_FALSE`, `1_TRUE`, type="adm_rate") 
                                   ],
    data.table::dcast.data.table(data=tab_adm_num_agg,  
                                 formula=sample+variable~last_adm_dropped, 
                                 value.var=c("0", "V1", "1"))[
                                   , .(sample, variable, `0_FALSE`, `V1_FALSE`, 
                                       `1_FALSE`, `1_TRUE`, type="adm_num")
                                   ], 
    data.table::dcast.data.table(data=tab_adm_LOS_agg,  
                                 formula=sample+variable~last_adm_dropped, 
                                 value.var=c("0", "V1", "1"))[
                                   , .(sample, variable, `0_FALSE`, `V1_FALSE`,
                                       `1_FALSE`, `1_TRUE`, type="adm_LOS")
                                   ] 
  )
  full_tab_3[variable=="any_adm", var_name := "All"]
  full_tab_3[variable=="Low", var_name := "   Low Intensity"]
  full_tab_3[variable=="High", var_name := "   High Intensity"]
  data.table::setcolorder(full_tab_3, c("var_name", "0_FALSE", "V1_FALSE", "1_FALSE",
                                        "1_TRUE", "sample", "variable", "type"))
  
  full_tab_3$diff <- round(full_tab_3$`1_FALSE`- full_tab_3$`V1_FALSE`, 3 )
  row_names <- full_tab_3[sample == sample_n]$var_name
  full_tab_3 <- bind_rows(
    full_tab_3[, -c(1,5, 6, 7, 8)] %>% 
      slice(1:3) %>% 
      mutate(across(everything(),~formatC(.x*100,format="f",digits=1))),
    full_tab_3[, -c(1,5, 6, 7, 8)] %>% 
      slice(4:9) %>% 
      mutate(across(everything(), ~formatC(., format="f", digits=1))))  

  invisible(Hmisc::latex(
    full_tab_3,
    file = paste0("FE_adm_stat_agg_", sample_n, "month.tex"),
    center = 'centering',
    n.cgroup = c(2, 1,1),
    cgroup = c("Survivor", "Decedent", "Difference"),
    colheads = c("Unweighted",
                 "\\thead{Reweighted by\\\\Decedent Risk}",
                 "",
                 "\\thead{Decedent -\\\\ Survivor\\\\(Reweighted}"),
    extracolheads = c("(1)", "(2)", "(3)", "(4)"),
    n.rgroup = c(3, 3, 3),
    rgroup = c("A. Any admission",
               "\\thead{B. Admissions per Month\\\\~~~(if Any During the Month)}",
               "C. Length of Stay (Days)"),
    rowname = row_names,
    rowlabel = "",
    col.just = c(rep.int("r", 4)),
    na.blank = TRUE, 
    extracolsize = "normalsize"
  ))
  return(full_tab_3)
}
